R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(glue)
## 
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
## 
##     collapse
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor

Including Plots

You can also embed plots, for example:

## [1] 24058263        3
data2 <- read.csv("data_input/combined_data_2.txt", header = FALSE, sep = ",")
data_summary <- data %>% 
  group_by(rating)  %>% 
  count(rating)
data_summary
levels(data_summary$rating)
## NULL
label_percent <- label_dollar(suffix = '%' ,prefix = '')
data_summary <- data_summary %>% mutate(rating = as.factor(rating))
data_summary$prob <- data_summary$n/sum(data_summary$n)
data_summary <- data_summary %>% 
  mutate(tooltip = glue("probability: {label_percent(prob)}"))
data_summary

Data Visualization

data_sum_graph <- ggplot(data_summary, aes(x = rating, y = prob, text = tooltip, fill = prob)) +
  geom_col(position = "identity") +
  labs(title = "Probability Distribution of Movie Ratings",
       subtitle = "For data set 1",
       x = "Movie rating",
       y = "Probability") +
  scale_fill_gradient(low = "#e4333e", high = "#52171a") +
  theme_minimal() 
ggplotly(data_sum_graph, tooltip = c("text"))